import_data("jake_gyllenhaal") 
NAs introduced by coercion
filmes = read_imported_data()
filmes %>% 
    glimpse()
Observations: 20
Variables: 5
$ avaliacao  <int> 92, 68, 73, 52, 73, 59, 82, 85, 92, 49, 35, 64, 47, 90, 87, 61, 62, 44, ...
$ filme      <chr> "Stronger", "Life", "Nocturnal Animals", "Demolition", "Everest", "South...
$ papel      <chr> "Jeff Bauman", "David Jordan", "Tony HastingsEdward Sheffield", "Davis M...
$ bilheteria <dbl> 4.2, 30.2, 10.7, 1.7, 46.6, 42.4, 61.0, 39.1, 54.7, 33.3, 90.8, 28.6, 9....
$ ano        <int> 2017, 2017, 2016, 2016, 2015, 2015, 2013, 2012, 2011, 2010, 2010, 2009, ...

Data Overview

Bilheteria

filmes %>% 
    ggplot(aes(x = ano, y = bilheteria)) + 
    geom_point(size = 4, color = paleta[1]) 

filmes %>% 
    ggplot(aes(x = bilheteria)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = "grey", color = "black") + 
    geom_rug(size = .5) +
    scale_x_continuous(breaks=seq(0,210,10))

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=bilheteria)) + 
        stat_qq()

p = filmes %>% 
    ggplot(aes(x = "", y = bilheteria, label = filme)) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p) %>% 
    layout(autosize = F)

Avaliação

filmes %>% 
    ggplot(aes(x = ano, y = avaliacao)) + 
    geom_point(size = 4, color = paleta[1])  +
    scale_y_continuous(limits = c(0, 100))

filmes %>% 
    ggplot(aes(x = avaliacao)) + 
    geom_histogram(binwidth = 10, boundary = 0, 
                   fill = paleta[3], color = "black") + 
    geom_rug(size = .5) 

filmes %>% 
    group_by(filme) %>%
    ggplot(aes(sample=avaliacao)) + 
    stat_qq() 

p = filmes %>% 
    ggplot(aes(x = "", y = avaliacao, label = filme)) + 
    geom_jitter(width = .05, alpha = .3, size = 3) + 
    labs(x = "")
ggplotly(p) %>% 
    layout(autosize = F)

Agrupamento hierárquico

agrupamento_h = filmes %>% 
    mutate(nome = paste0(filme, " (av=", avaliacao, ")")) %>% 
    as.data.frame() %>% 
    column_to_rownames("filme") %>% 
    select(avaliacao) %>%
    dist(method = "euclidian") %>% 
    hclust(method = "ward.D")
ggdendrogram(agrupamento_h, rotate = T, size = 2, theme_dendro = F) + 
    labs(y = "Dissimilaridade", x = "", title = "Dendrograma")

get_grupos <- function(agrupamento, num_grupos){
    agrupamento %>% 
        cutree(num_grupos) %>% 
        as.data.frame() %>% 
        mutate(label = rownames(.)) %>% 
        gather(key =  "k", value = "grupo", -label) %>% 
        mutate(grupo = as.character(grupo))
}
atribuicoes = get_grupos(agrupamento_h, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    left_join(filmes, by = c("label" = "filme"))
atribuicoes %>% 
    ggplot(aes(x = "Filmes", y = avaliacao, colour = grupo)) + 
    geom_jitter(width = .02, height = 0, size = 1.6, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2")

k_escolhido = 3
plot_atrib <-
    atribuicoes %>% 
    filter(k == k_escolhido) %>% 
    ggplot(aes(x = reorder(label, avaliacao), y = avaliacao, colour = grupo)) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") + 
    labs(x = "", y = "Avaliação RT") + 
    coord_flip()
ggplotly(plot_atrib) %>%
    layout(autosize = F)

Com duas dimensões

agrupamento_h_2d = filmes %>%
   mutate(bilheteria = log10(bilheteria)) %>%
   mutate_at(vars("avaliacao", "bilheteria"), funs(scale)) %>%
   column_to_rownames("filme") %>%
   select("avaliacao", "bilheteria") %>%
   dist(method = "euclidean") %>%
   hclust(method = "ward.D")
Setting row names on a tibble is deprecated.
ggdendrogram(agrupamento_h_2d, rotate = TRUE, theme_dendro = F)

filmes2 <- filmes %>%
    mutate(bilheteria = log10(bilheteria))
plota_hclusts_2d(agrupamento_h_2d,
                filmes2,
                c("avaliacao", "bilheteria"),
                linkage_method = "ward.D", 
                ks = 1:6,
                palette = "Dark2") + 
    scale_y_log10()

atribuicoes = get_grupos(agrupamento_h_2d, num_grupos = 1:6)
atribuicoes = atribuicoes %>% 
    filter(k == 3) %>%
    mutate(filme = label) %>% 
    left_join(filmes, by = "filme")
z <- atribuicoes %>%
    ggplot(aes(x = avaliacao,
               y = bilheteria,
               colour = grupo,
               text = paste(
                    "Filme:", filme,
                    "\nBilheteria:", bilheteria,"m\n",
                    "Avaliação:", avaliacao))) + 
    geom_jitter(width = .02, height = 0, size = 3, alpha = .6) + 
    facet_wrap(~ paste(k, " grupos")) + 
    scale_color_brewer(palette = "Dark2") +
    scale_y_log10()
ggplotly(z, tooltip = "text") %>%
    layout(autosize = F)
LS0tCnRpdGxlOiAiVGlwb3MgZGUgZmlsbWUgZGUgSmFrZSBHeWxsZW5oYWFsIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IHllcwotLS0KCmBgYHtyIGVjaG89RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGhlcmUpCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShwbG90bHkpCmxpYnJhcnkoZ2dkZW5kcm8pCgpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9saWIuUiIpKQpzb3VyY2UoaGVyZTo6aGVyZSgiY29kZS9wbG90YV9zb2x1Y29lc19oY2x1c3QuUiIpKQoKdGhlbWVfc2V0KHRoZW1lX3JlcG9ydCgpKQoKa25pdHI6Om9wdHNfY2h1bmskc2V0KHRpZHkgPSBGQUxTRSwKICAgICAgICAgICAgICAgICAgICAgIGZpZy53aWR0aCA9IDYsCiAgICAgICAgICAgICAgICAgICAgICBmaWcuaGVpZ2h0ID0gNSwKICAgICAgICAgICAgICAgICAgICAgIGVjaG8gPSBUUlVFKQoKcGFsZXRhID0gYygiIzQwNEU0RCIsCiAgICAgICAgICAgIiM5MkRDRTUiLAogICAgICAgICAgICIjOTM4QkExIiwKICAgICAgICAgICAiIzJEMzE0MiIsCiAgICAgICAgICAgIiNGNDc0M0IiKQpgYGAKCmBgYHtyIHJlYWR9CmltcG9ydF9kYXRhKCJqYWtlX2d5bGxlbmhhYWwiKSAKZmlsbWVzID0gcmVhZF9pbXBvcnRlZF9kYXRhKCkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdsaW1wc2UoKQpgYGAKCgojIyBEYXRhIE92ZXJ2aWV3CgojIyMgQmlsaGV0ZXJpYQoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSBhbm8sIHkgPSBiaWxoZXRlcmlhKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAKYGBgCgoKCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYmlsaGV0ZXJpYSkpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9ICJncmV5IiwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSArCiAgICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPXNlcSgwLDIxMCwxMCkpCmBgYAoKYGBge3J9CmZpbG1lcyAlPiUgCiAgICBncm91cF9ieShmaWxtZSkgJT4lCiAgICBnZ3Bsb3QoYWVzKHNhbXBsZT1iaWxoZXRlcmlhKSkgKyAKICAgICAgICBzdGF0X3FxKCkKYGBgCgpgYGB7cn0KcCA9IGZpbG1lcyAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSAiIiwgeSA9IGJpbGhldGVyaWEsIGxhYmVsID0gZmlsbWUpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwKSAlPiUgCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQoKYGBgCgojIyMgQXZhbGlhw6fDo28KCmBgYHtyfQpmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gYW5vLCB5ID0gYXZhbGlhY2FvKSkgKyAKICAgIGdlb21fcG9pbnQoc2l6ZSA9IDQsIGNvbG9yID0gcGFsZXRhWzFdKSAgKwogICAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoMCwgMTAwKSkKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdncGxvdChhZXMoeCA9IGF2YWxpYWNhbykpICsgCiAgICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IDEwLCBib3VuZGFyeSA9IDAsIAogICAgICAgICAgICAgICAgICAgZmlsbCA9IHBhbGV0YVszXSwgY29sb3IgPSAiYmxhY2siKSArIAogICAgZ2VvbV9ydWcoc2l6ZSA9IC41KSAKYGBgCgpgYGB7cn0KZmlsbWVzICU+JSAKICAgIGdyb3VwX2J5KGZpbG1lKSAlPiUKICAgIGdncGxvdChhZXMoc2FtcGxlPWF2YWxpYWNhbykpICsgCiAgICBzdGF0X3FxKCkgCmBgYAoKYGBge3J9CnAgPSBmaWxtZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gIiIsIHkgPSBhdmFsaWFjYW8sIGxhYmVsID0gZmlsbWUpKSArIAogICAgZ2VvbV9qaXR0ZXIod2lkdGggPSAuMDUsIGFscGhhID0gLjMsIHNpemUgPSAzKSArIAogICAgbGFicyh4ID0gIiIpCgpnZ3Bsb3RseShwKSAlPiUgCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQoKYGBgCgojIyBBZ3J1cGFtZW50byBoaWVyw6FycXVpY28KCmBgYHtyfQphZ3J1cGFtZW50b19oID0gZmlsbWVzICU+JSAKICAgIG11dGF0ZShub21lID0gcGFzdGUwKGZpbG1lLCAiIChhdj0iLCBhdmFsaWFjYW8sICIpIikpICU+JSAKICAgIGFzLmRhdGEuZnJhbWUoKSAlPiUgCiAgICBjb2x1bW5fdG9fcm93bmFtZXMoImZpbG1lIikgJT4lIAogICAgc2VsZWN0KGF2YWxpYWNhbykgJT4lCiAgICBkaXN0KG1ldGhvZCA9ICJldWNsaWRpYW4iKSAlPiUgCiAgICBoY2x1c3QobWV0aG9kID0gIndhcmQuRCIpCgpnZ2RlbmRyb2dyYW0oYWdydXBhbWVudG9faCwgcm90YXRlID0gVCwgc2l6ZSA9IDIsIHRoZW1lX2RlbmRybyA9IEYpICsgCiAgICBsYWJzKHkgPSAiRGlzc2ltaWxhcmlkYWRlIiwgeCA9ICIiLCB0aXRsZSA9ICJEZW5kcm9ncmFtYSIpCmBgYAoKYGBge3J9CmdldF9ncnVwb3MgPC0gZnVuY3Rpb24oYWdydXBhbWVudG8sIG51bV9ncnVwb3MpewogICAgYWdydXBhbWVudG8gJT4lIAogICAgICAgIGN1dHJlZShudW1fZ3J1cG9zKSAlPiUgCiAgICAgICAgYXMuZGF0YS5mcmFtZSgpICU+JSAKICAgICAgICBtdXRhdGUobGFiZWwgPSByb3duYW1lcyguKSkgJT4lIAogICAgICAgIGdhdGhlcihrZXkgPSAgImsiLCB2YWx1ZSA9ICJncnVwbyIsIC1sYWJlbCkgJT4lIAogICAgICAgIG11dGF0ZShncnVwbyA9IGFzLmNoYXJhY3RlcihncnVwbykpCn0KCmF0cmlidWljb2VzID0gZ2V0X2dydXBvcyhhZ3J1cGFtZW50b19oLCBudW1fZ3J1cG9zID0gMTo2KQoKYXRyaWJ1aWNvZXMgPSBhdHJpYnVpY29lcyAlPiUgCiAgICBsZWZ0X2pvaW4oZmlsbWVzLCBieSA9IGMoImxhYmVsIiA9ICJmaWxtZSIpKQoKYXRyaWJ1aWNvZXMgJT4lIAogICAgZ2dwbG90KGFlcyh4ID0gIkZpbG1lcyIsIHkgPSBhdmFsaWFjYW8sIGNvbG91ciA9IGdydXBvKSkgKyAKICAgIGdlb21faml0dGVyKHdpZHRoID0gLjAyLCBoZWlnaHQgPSAwLCBzaXplID0gMS42LCBhbHBoYSA9IC42KSArIAogICAgZmFjZXRfd3JhcCh+IHBhc3RlKGssICIgZ3J1cG9zIikpICsgCiAgICBzY2FsZV9jb2xvcl9icmV3ZXIocGFsZXR0ZSA9ICJEYXJrMiIpCgpgYGAKCmBgYHtyfQprX2VzY29saGlkbyA9IDMKCnBsb3RfYXRyaWIgPC0KICAgIGF0cmlidWljb2VzICU+JSAKICAgIGZpbHRlcihrID09IGtfZXNjb2xoaWRvKSAlPiUgCiAgICBnZ3Bsb3QoYWVzKHggPSByZW9yZGVyKGxhYmVsLCBhdmFsaWFjYW8pLCB5ID0gYXZhbGlhY2FvLCBjb2xvdXIgPSBncnVwbykpICsgCiAgICBnZW9tX2ppdHRlcih3aWR0aCA9IC4wMiwgaGVpZ2h0ID0gMCwgc2l6ZSA9IDMsIGFscGhhID0gLjYpICsgCiAgICBmYWNldF93cmFwKH4gcGFzdGUoaywgIiBncnVwb3MiKSkgKyAKICAgIHNjYWxlX2NvbG9yX2JyZXdlcihwYWxldHRlID0gIkRhcmsyIikgKyAKICAgIGxhYnMoeCA9ICIiLCB5ID0gIkF2YWxpYcOnw6NvIFJUIikgKyAKICAgIGNvb3JkX2ZsaXAoKQoKZ2dwbG90bHkocGxvdF9hdHJpYikgJT4lCiAgICBsYXlvdXQoYXV0b3NpemUgPSBGKQoKYGBgCgojIyBDb20gZHVhcyBkaW1lbnPDtWVzCgpgYGB7cn0KYWdydXBhbWVudG9faF8yZCA9IGZpbG1lcyAlPiUKICAgbXV0YXRlKGJpbGhldGVyaWEgPSBsb2cxMChiaWxoZXRlcmlhKSkgJT4lCiAgIG11dGF0ZV9hdCh2YXJzKCJhdmFsaWFjYW8iLCAiYmlsaGV0ZXJpYSIpLCBmdW5zKHNjYWxlKSkgJT4lCiAgIGNvbHVtbl90b19yb3duYW1lcygiZmlsbWUiKSAlPiUKICAgc2VsZWN0KCJhdmFsaWFjYW8iLCAiYmlsaGV0ZXJpYSIpICU+JQogICBkaXN0KG1ldGhvZCA9ICJldWNsaWRlYW4iKSAlPiUKICAgaGNsdXN0KG1ldGhvZCA9ICJ3YXJkLkQiKQoKZ2dkZW5kcm9ncmFtKGFncnVwYW1lbnRvX2hfMmQsIHJvdGF0ZSA9IFRSVUUsIHRoZW1lX2RlbmRybyA9IEYpCmBgYAoKYGBge3J9CmZpbG1lczIgPC0gZmlsbWVzICU+JQogICAgbXV0YXRlKGJpbGhldGVyaWEgPSBsb2cxMChiaWxoZXRlcmlhKSkKCnBsb3RhX2hjbHVzdHNfMmQoYWdydXBhbWVudG9faF8yZCwKICAgICAgICAgICAgICAgIGZpbG1lczIsCiAgICAgICAgICAgICAgICBjKCJhdmFsaWFjYW8iLCAiYmlsaGV0ZXJpYSIpLAogICAgICAgICAgICAgICAgbGlua2FnZV9tZXRob2QgPSAid2FyZC5EIiwgCiAgICAgICAgICAgICAgICBrcyA9IDE6NiwKICAgICAgICAgICAgICAgIHBhbGV0dGUgPSAiRGFyazIiKSArIAogICAgc2NhbGVfeV9sb2cxMCgpCmBgYAoKYGBge3J9CmF0cmlidWljb2VzID0gZ2V0X2dydXBvcyhhZ3J1cGFtZW50b19oXzJkLCBudW1fZ3J1cG9zID0gMTo2KQoKYXRyaWJ1aWNvZXMgPSBhdHJpYnVpY29lcyAlPiUgCiAgICBmaWx0ZXIoayA9PSAzKSAlPiUKICAgIG11dGF0ZShmaWxtZSA9IGxhYmVsKSAlPiUgCiAgICBsZWZ0X2pvaW4oZmlsbWVzLCBieSA9ICJmaWxtZSIpCgp6IDwtIGF0cmlidWljb2VzICU+JQogICAgZ2dwbG90KGFlcyh4ID0gYXZhbGlhY2FvLAogICAgICAgICAgICAgICB5ID0gYmlsaGV0ZXJpYSwKICAgICAgICAgICAgICAgY29sb3VyID0gZ3J1cG8sCiAgICAgICAgICAgICAgIHRleHQgPSBwYXN0ZSgKICAgICAgICAgICAgICAgICAgICAiRmlsbWU6IiwgZmlsbWUsCiAgICAgICAgICAgICAgICAgICAgIlxuQmlsaGV0ZXJpYToiLCBiaWxoZXRlcmlhLCJtXG4iLAogICAgICAgICAgICAgICAgICAgICJBdmFsaWHDp8OjbzoiLCBhdmFsaWFjYW8pKSkgKyAKICAgIGdlb21faml0dGVyKHdpZHRoID0gLjAyLCBoZWlnaHQgPSAwLCBzaXplID0gMywgYWxwaGEgPSAuNikgKyAKICAgIGZhY2V0X3dyYXAofiBwYXN0ZShrLCAiIGdydXBvcyIpKSArIAogICAgc2NhbGVfY29sb3JfYnJld2VyKHBhbGV0dGUgPSAiRGFyazIiKSArCiAgICBzY2FsZV95X2xvZzEwKCkKCmdncGxvdGx5KHosIHRvb2x0aXAgPSAidGV4dCIpICU+JQogICAgbGF5b3V0KGF1dG9zaXplID0gRikKYGBgCg==